knitr::opts_chunk$set(echo=T, out.width="100%")

1 Analyses and plots

Each year in November we do a series of time trials in which we have to row for maximum distance in a set amount of time. The first week starts at 2 minutes and the time is increased by 1 minute each week. For every time interval, each rower must complete 4 trials. This is the data from my rowvember’s from 2016 through 2018.

1.1 Total meters for Rowvember by year

Over the course of the month we complete 16 rows, so the meters really add up. Let’s take a look at how much rowing actually goes down.




Totals
year meters miles
2016 17321 10.76
2017 17373 10.8
2018 17563 10.91
total 52257 32.47




In the last 3 years I have rowed a total of 52257 meters! That’s 32.47 miles! One thing I was interested in was finding out if I was doing more work (rowing more distance) each year. Looking at the plot I see that I did indeed row more meters each year (the height of the colored dots increases). We can visualize the year to year improvement by subtracting the total meters of a given year from those of the previous year.





Year Improvement
16-17 52
17-18 190





This plot shows the difference in meters of total output from 2016 to 2017 and from 2017 to 2018. I’ve rowed more total meters every year. Specifically, I improved by 52 total meters from 2016 to 2017, and by 190(!) total meters from 2017 to 2018. Nice.



1.2 Total meters per time interval

Rowvember can get pretty challenging because it involves 4 different row times. For me, the 2-minute and 3-minute rows are a sprint, which I prefer, but the 3-minute row is torture. The 4 and 5-minute rows aren’t easy, but I find them to be less difficult (and less stressful) because I just focus on holding my pace. Here I look at the total distance rowed each year as a function of the time intverval (2, 3, 4, or 5 minutes).


Year
Row time 2016 2017 2018
2 2641 2642 2669
3 3733 3779 3893
4 4875 4914 4914
5 6072 6038 6087





It’s pretty clear that with each additional minute of row time the total distance increases (big surprise). What may be more interesting is seeing the difference in total meters rowed per time interval for each year, so, again, we can subtract the total distance rowed from that of the previous year, for each time interval.




Comparison
Row time 16-17 17-18
2 1 27
3 46 114
4 39 0
5 -34 49






Text here

1.3 Average meters per time interval

df_individual %>% 
    group_by(., rower, time) %>% 
    summarize(., avg_distance = mean(distance), sd_distance = sd(distance), 
        .groups = "drop") %>% 
    mutate_if(., is.numeric, round, digits = 2) %>% 
    datatable(.)

1.4 Fit model of meters as a function of time

## boundary (singular) fit: see ?isSingular
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: distance ~ (ot1 + ot2) + year + ((ot1 + ot2) | trial) + ((ot1 +  
##     ot2) | year)
##    Data: df_individual
## 
##      AIC      BIC   logLik deviance df.resid 
##    403.9    437.5   -183.9    367.9       30 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5612 -0.4626 -0.1703  0.4846  2.7076 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr     
##  trial    (Intercept)   9.896   3.146            
##           ot1          57.202   7.563   1.00     
##           ot2           9.327   3.054   1.00 1.00
##  year     (Intercept)   0.000   0.000            
##           ot1          11.471   3.387    NaN     
##           ot2          48.298   6.950    NaN 1.00
##  Residual             103.357  10.166            
## Number of obs: 48, groups:  trial, 4; year, 3
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 1082.562      2.989 362.190
## ot1          634.168      5.171 122.643
## ot2            1.708      5.200   0.328
## year2017       3.250      3.594   0.904
## year2018      15.125      3.594   4.208
## 
## Correlation of Fixed Effects:
##          (Intr) ot1    ot2    yr2017
## ot1       0.385                     
## ot2       0.155  0.507              
## year2017 -0.601  0.000  0.000       
## year2018 -0.601  0.000  0.000  0.500
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular

1.5 Plots of meters as a function of time

# Plot avg distance for each time 
df_individual %>%
  ggplot(., aes(x = time, y = distance, color = as.factor(year), dodge = year)) + 
  geom_point(position = position_dodge(0.5), size = 4, alpha = 0.3) + 
  stat_summary(fun.data = mean_sdl, geom = 'pointrange', 
                         position = position_dodge(0.5)) + 
  stat_summary(fun = mean, geom = 'point', size = 2, 
                         position = position_dodge(0.5)) + 
  stat_summary(aes(y = fitted(fit)), fun = mean, geom = 'line') + 
  scale_x_continuous(breaks = c(2, 3, 4, 5), 
                     labels = c('2min', '3min', '4min', '5min')) +
    ylim(500, 1700) + 
  xlab('Time interval') + ylab('Average distance (m)') + 
  theme_bw()

# 3D plot 
with(df_individual, scatter3D(x = trial, y = time, z = distance, 
     colvar = distance, pch = 19, cex = 0.5, phi = 10, bty = 'g', 
     type = 'h', ticktype = "detailed", xlab = 'Trial', ylab = 'Time', 
     zlab = 'Distame (m)', zlim = c(0, 1800), xlim = c(0, 4.2), 
     ylim = c(1.8, 5.2), col = gg.col(100)))



















2 Group data

2.0.1 Total meters (females)

2.0.2 Total meters (males)

2.0.3 Total meters as a function of time

## group: female
## time: 2
## # A tibble: 10 × 4
##    athlete         group   time totalM
##    <chr>           <chr>  <dbl>  <dbl>
##  1 Greta Schneider female     2   2380
##  2 Amanda Green    female     2   2326
##  3 Mo Rabbitt      female     2   2180
##  4 Cris Rampolla   female     2   2130
##  5 Donna Borst     female     2   2101
##  6 Jackie Szabo    female     2   2098
##  7 Amanda Lehmkuhl female     2   1533
##  8 Alyssa Calvosa  female     2   1021
##  9 Sindy Rodriguez female     2    989
## 10 Sunita Algoo    female     2    987
## ------------------------------------------------------------ 
## group: male
## time: 2
## # A tibble: 10 × 4
##    athlete           group  time totalM
##    <chr>             <chr> <dbl>  <dbl>
##  1 Joseph Casillas   male      2   2641
##  2 Keith Sanchez     male      2   2556
##  3 Hamilton Villamor male      2   1933
##  4 Afam Moneme       male      2   1923
##  5 Michael Bernardi  male      2   1885
##  6 Ken Percy         male      2   1870
##  7 Don Borst         male      2   1851
##  8 Rhaman Johnson    male      2   1245
##  9 Adolf Soos        male      2   1243
## 10 Ashe Husein       male      2   1242
## ------------------------------------------------------------ 
## group: female
## time: 3
## # A tibble: 10 × 4
##    athlete           group   time totalM
##    <chr>             <chr>  <dbl>  <dbl>
##  1 Greta Schneider   female     3   3355
##  2 Amanda Green      female     3   3344
##  3 Mo Rabbitt        female     3   3216
##  4 Jackie Szabo      female     3   3106
##  5 Cris Rampolla     female     3   3077
##  6 Amanda Lehmkuhl   female     3   2942
##  7 Donna Borst       female     3   2914
##  8 Lisa Walsh        female     3   2237
##  9 Rachel Penick     female     3   1419
## 10 Alexandra Walczak female     3   1397
## ------------------------------------------------------------ 
## group: male
## time: 3
## # A tibble: 10 × 4
##    athlete          group  time totalM
##    <chr>            <chr> <dbl>  <dbl>
##  1 Joseph Casillas  male      3   3733
##  2 Afam Moneme      male      3   3636
##  3 Keith Sanchez    male      3   3614
##  4 Michael Bernardi male      3   3605
##  5 Adolf Soos       male      3   2709
##  6 Scott Penick     male      3   2609
##  7 Joseph Percy     male      3   2556
##  8 Steve Rampolla   male      3   2468
##  9 Ken Percy        male      3   1744
## 10 Mukund Murali    male      3   1676
## ------------------------------------------------------------ 
## group: female
## time: 4
## # A tibble: 10 × 4
##    athlete           group   time totalM
##    <chr>             <chr>  <dbl>  <dbl>
##  1 Greta Schneider   female     4   4429
##  2 Amanda Green      female     4   4391
##  3 Mo Rabbitt        female     4   4205
##  4 Jackie Szabo      female     4   4094
##  5 Cris Rampolla     female     4   4020
##  6 Lisa Walsh        female     4   3874
##  7 Amanda Lehmkuhl   female     4   3831
##  8 Donna Borst       female     4   3716
##  9 Erika Flowers     female     4   2822
## 10 Alexandra Walczak female     4   2736
## ------------------------------------------------------------ 
## group: male
## time: 4
## # A tibble: 10 × 4
##    athlete           group  time totalM
##    <chr>             <chr> <dbl>  <dbl>
##  1 Joseph Casillas   male      4   4875
##  2 Afam Moneme       male      4   4716
##  3 Michael Bernardi  male      4   4703
##  4 Keith Sanchez     male      4   4673
##  5 Joseph Percy      male      4   4368
##  6 Steve Rampolla    male      4   4300
##  7 Adolf Soos        male      4   2394
##  8 Hamilton Villamor male      4   2300
##  9 Matt Corrado      male      4   2135
## 10 Bill Reeder       male      4   2134
## ------------------------------------------------------------ 
## group: female
## time: 5
## # A tibble: 10 × 4
##    athlete           group   time totalM
##    <chr>             <chr>  <dbl>  <dbl>
##  1 Greta Schneider   female     5   5501
##  2 Amanda Green      female     5   5448
##  3 Mo Rabbitt        female     5   5140
##  4 Jackie Szabo      female     5   4987
##  5 Cris Rampolla     female     5   4933
##  6 Amanda Lehmkuhl   female     5   4801
##  7 Lisa Walsh        female     5   4526
##  8 Tara Arhakos      female     5   4381
##  9 Donna Borst       female     5   3575
## 10 Alexandra Walczak female     5   2104
## ------------------------------------------------------------ 
## group: male
## time: 5
## # A tibble: 10 × 4
##    athlete          group  time totalM
##    <chr>            <chr> <dbl>  <dbl>
##  1 Joseph Casillas  male      5   6072
##  2 Michael Bernardi male      5   5806
##  3 Afam Moneme      male      5   5742
##  4 Adolf Soos       male      5   5689
##  5 Keith Sanchez    male      5   5618
##  6 Joseph Percy     male      5   5447
##  7 Matt Corrado     male      5   5323
##  8 Ken Percy        male      5   4081
##  9 Bill Reeder      male      5   3940
## 10 Andrew Watt      male      5   2525

2.0.4 Avg. meters by gender

2.0.5 Plot of total meters for top 10 overall

2.0.6 Plot of top 10 in each trial

3 2019

dat2019 <- read_csv(here("data", "rowvember2019.csv")) %>% 
  mutate(time = hms(time) %>% as.duration())
## Rows: 64 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): rower, gender, date, time
## dbl (1): trial
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dat2019 %>% 
  ggplot(., aes(x = gender, y = time, color = gender)) + 
    facet_wrap(~ trial, scales = "free_y") + 
    geom_text(aes(label = rower), show.legend = F, hjust = 0.5)

4 2021

dat2021 <- read_csv(here("data", "rowvember2021.csv")) %>% 
  mutate(time = hms(time) %>% as.duration())
## Rows: 77 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): rower, gender, date, time
## dbl (1): trial
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dat2021 %>% 
  ggplot(., aes(x = gender, y = time, color = gender)) + 
    facet_wrap(~ trial, scales = "free_y") + 
    geom_text(aes(label = rower), show.legend = F, hjust = 0.5)

5 2019-2021 comparison

dat201921 <- bind_rows(dat2019, dat2021) %>% 
  separate(date, into = c("month", "day", "year"), sep = "/")
dat201921 %>% 
  mutate(is_casillas = 
    if_else(rower == "Joseph Casillas", "Casillas", "Other")) %>% 
  ggplot(., aes(x = year, y = time, fill = gender)) + 
    facet_wrap(. ~ trial, scales = "free_y", ncol = 4) + 
    geom_point(position = position_dodge(0.75), 
      size = 3, shape = 21, alpha = 0.5, color = "white") +
    stat_summary(fun.data = mean_se, geom = "pointrange", pch = 21, 
      position = position_dodge(0.75), size = 0.9) + 
    scale_fill_viridis_d(name = NULL, option = "A", begin = 0.4, end = 0.85) + 
   # scale_shape_manual(values = c(21, 24), guide = "none") + 
    labs(title = "Rowevember", 
      subtitle = "Time as a function of distance for 2019 and 2021", 
      y = "Time in seconds", x = NULL) + 
    ds4ling::ds4ling_bw_theme() + 
    theme(legend.position = "bottom")

dat201921 %>% 
  filter(rower != "Joseph Casillas") %>% 
  ggplot(., aes(x = year, y = time, fill = gender)) + 
    facet_wrap(. ~ trial, scales = "free_y", ncol = 4) + 
    geom_point(position = position_dodge(0.75), 
      size = 3, shape = 21, alpha = 0.5, color = "white") + 
    geom_point(data = filter(dat201921, rower == "Joseph Casillas"), 
      pch = 24, fill = "#cc0033", size = 3) + 
    stat_summary(fun.data = mean_se, geom = "pointrange", pch = 21, 
      position = position_dodge(0.75), size = 0.9) + 
    scale_fill_viridis_d(name = NULL, option = "A", begin = 0.4, end = 0.85) + 
    scale_shape_manual(values = c(21, 24), guide = "none") + 
    labs(title = "Rowevember", 
      subtitle = "Time as a function of distance for 2019 and 2021 (red triangle = Casillas)", 
      y = "Time in seconds", x = NULL) + 
    ds4ling::ds4ling_bw_theme(base_family = "Times") + 
    theme(legend.position = "bottom")

pace_to_time <- function(pace, distance = 500) {
  if (is.character(pace)) {
   stop("You messed up")
  } else {
  min <- strex::str_nth_number(as.character(pace), n = 1)
  sec <- strex::str_nth_number(as.character(pace), n = 2)
  min_to_sec <- min * 60
  time_in_secs <- min_to_sec + sec
  total_sec <- time_in_secs * distance / 500
  estimated_time <- lubridate::seconds_to_period(total_sec)
  return(estimated_time)
  }
}

pace_to_time(pace = c(ms("2:49"), ms("2:49")), distance = 5000)
## [1] "28M 10S" "28M 10S"
# Paces for 2021
#
# 1000 = 1:39
# 2000 = 1:41
# 3000 = 1:47
# 5000 = 1:51

# data frame of time course
time_course <- crossing(
  min = seq(0, 30, 1), 
  sec = seq(0, 59, 1)
  ) %>% 
  unite(min_sec, min, sec, sep = ":") %>% 
  mutate(min_sec = ms(min_sec))


estimates <- tibble(
  paces = seq(ms("1:40"), ms("1:58"), ms("0:02")), 
  pace_t = as.numeric(as.duration(ms(paces))), 
  distance = 5000
  ) %>% 
  crossing(., time_course) %>% 
  mutate(
    estimated_finish = pace_to_time(paces, distance), 
    progress = min_sec / estimated_finish) %>% 
  mutate(progress = if_else(progress >= 1, 1, .$progress), 
    time_course = as.duration(min_sec), 
    current_distance = (500 / pace_t) * as.numeric(min_sec), 
    paces = as.character(paces))

main_plot <- estimates %>% 
  ggplot(., aes(x = min_sec, y = progress)) + 
    geom_path(aes(color = paces)) + 
    labs(y = "Progress", x = "Elapsed time (hms)", 
      title = "Rowvember", subtitle = "5k pacing estimates") + 
    scale_color_viridis_d(name = "500m split", option = "C", 
      begin = 0.1, direction = -1) + 
    scale_x_time() + 
    hrbrthemes::scale_y_percent() + 
    coord_cartesian(xlim = c(NA, 1200), ylim = c(NA, 1.01)) + 
    ggdark::dark_theme_bw()
## Inverted geom defaults of fill and color/colour.
## To change them back, use invert_geom_defaults().
inset_plot <- main_plot + 
  coord_cartesian(ylim = c(0.8, 1.01), xlim = c(920, 1180)) + 
  scale_x_time(breaks = c(960, 1050, 1140)) + 
  labs(title = NULL, subtitle = NULL, y = NULL, x = NULL) + 
  theme(legend.position = "none")
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Add it as an inset
main_plot + annotation_custom(
  grob = ggplotGrob(inset_plot), 
  ymin = 0, ymax = 0.5, xmin = 595, xmax = 1250)

estimates %>% 
  filter(current_distance <= 5000) %>% 
  ggplot(., aes(x = progress, y = min_sec)) + 
    geom_path(aes(color = paces), size = 1) + 
    labs(x = "Progress", y = "Elapsed time (hms)", 
      title = "Rowvember", subtitle = "5k pacing estimates") + 
    scale_color_viridis_d(name = "500m split", option = "C", 
      begin = 0.1, direction = -1) + 
    scale_y_time() + 
    hrbrthemes::scale_x_percent() + 
    coord_cartesian(ylim = c(660, 1225), xlim = c(0.7, 1.01)) + 
    ggdark::dark_theme_bw()

pace_to_time(ms("2:00"), 5000)
## [1] "20M 0S"
k_estimates <- tibble(
  paces = seq(ms("2:10"), ms("2:26"), ms("0:02")), 
  pace_t = as.numeric(as.duration(ms(paces))), 
  distance = 5000
  ) %>% 
  crossing(., time_course) %>% 
  mutate(
    estimated_finish = pace_to_time(paces, distance), 
    progress = min_sec / estimated_finish) %>% 
  mutate(progress = if_else(progress >= 1, 1, .$progress), 
    time_course = as.duration(min_sec), 
    current_distance = (500 / pace_t) * as.numeric(min_sec), 
    paces = as.character(paces))

main_plot <- k_estimates %>% 
  ggplot(., aes(x = min_sec, y = progress)) + 
    geom_path(aes(color = paces)) + 
    labs(y = "Progress", x = "Elapsed time (hms)", 
      title = "Rowvember", subtitle = "5k pacing estimates") + 
    scale_color_viridis_d(name = "500m split", option = "C", 
      begin = 0.1, direction = -1) + 
    scale_x_time() + 
    hrbrthemes::scale_y_percent() + 
    coord_cartesian(xlim = c(NA, 1500), ylim = c(NA, 1.01)) + 
    ggdark::dark_theme_bw()

inset_plot <- main_plot + 
  coord_cartesian(ylim = c(0.85, 1.01), xlim = c(1260, 1460)) + 
  scale_x_time() + 
  labs(title = NULL, subtitle = NULL, y = NULL, x = NULL) + 
  theme(legend.position = "none")
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Add it as an inset
main_plot + annotation_custom(
  grob = ggplotGrob(inset_plot), 
  ymin = 0, ymax = 0.5, xmin = 750, xmax = 1550)